{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(20.25, 9.224288590455092)"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sb3_contrib import TRPO\n",
    "from stable_baselines3.common.evaluation import evaluate_policy\n",
    "\n",
    "model = TRPO(policy='MlpPolicy', env='CartPole-v1', verbose=0)\n",
    "\n",
    "evaluate_policy(model,\n",
    "                model.get_env(),\n",
    "                n_eval_episodes=20,\n",
    "                deterministic=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #800080; text-decoration-color: #800080\"> 100%</span> <span style=\"color: #729c1f; text-decoration-color: #729c1f\">━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━</span> <span style=\"color: #008000; text-decoration-color: #008000\">20,473/20,000 </span> [ <span style=\"color: #808000; text-decoration-color: #808000\">0:00:17</span> &lt; <span style=\"color: #008080; text-decoration-color: #008080\">0:00:00</span> , <span style=\"color: #800000; text-decoration-color: #800000\">1,169 it/s</span> ]\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[35m 100%\u001b[0m \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20,473/20,000 \u001b[0m [ \u001b[33m0:00:17\u001b[0m < \u001b[36m0:00:00\u001b[0m , \u001b[31m1,169 it/s\u001b[0m ]\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
      ],
      "text/plain": []
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "(364.8, 96.85329111599668)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#训练\n",
    "model.learn(total_timesteps=2_0000, progress_bar=True)\n",
    "\n",
    "evaluate_policy(model,\n",
    "                model.get_env(),\n",
    "                n_eval_episodes=20,\n",
    "                deterministic=False)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "include_colab_link": true,
   "name": "Copie de Unit 1: Train your first Deep Reinforcement Learning Agent 🚀.ipynb",
   "private_outputs": true,
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python [conda env:cuda117]",
   "language": "python",
   "name": "conda-env-cuda117-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "vscode": {
   "interpreter": {
    "hash": "ed7f8024e43d3b8f5ca3c5e1a8151ab4d136b3ecee1e3fd59e0766ccc55e1b10"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
